To run this code in my project using the renv environment, run the following lines of code
install.packages("renv") #install the package on the new computer (may not be necessary if renv bootstraps itself as expected)
renv::restore() #reinstall all the package versions in the renv lockfile
library("ViSEAGO")
##
## Warning: replacing previous import 'data.table::set' by 'dendextend::set' when
## loading 'ViSEAGO'
require("topGO")
## Loading required package: topGO
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: graph
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: GO.db
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: IRanges
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:utils':
##
## findMatches
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: SparseM
## Warning: package 'SparseM' was built under R version 4.3.3
##
## groupGOTerms: GOBPTerm, GOMFTerm, GOCCTerm environments built.
##
## Attaching package: 'topGO'
## The following object is masked from 'package:IRanges':
##
## members
require("tidyverse")
## Loading required package: tidyverse
## Warning: package 'tibble' was built under R version 4.3.3
## Warning: package 'lubridate' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.2
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::%within%() masks IRanges::%within%()
## ✖ ggplot2::annotate() masks ViSEAGO::annotate()
## ✖ stringr::boundary() masks graph::boundary()
## ✖ dplyr::collapse() masks IRanges::collapse()
## ✖ dplyr::combine() masks Biobase::combine(), BiocGenerics::combine()
## ✖ dplyr::desc() masks IRanges::desc()
## ✖ tidyr::expand() masks S4Vectors::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks S4Vectors::first()
## ✖ dplyr::lag() masks stats::lag()
## ✖ ggplot2::Position() masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce() masks IRanges::reduce()
## ✖ dplyr::rename() masks S4Vectors::rename()
## ✖ lubridate::second() masks S4Vectors::second()
## ✖ lubridate::second<-() masks S4Vectors::second<-()
## ✖ dplyr::select() masks AnnotationDbi::select()
## ✖ dplyr::slice() masks IRanges::slice()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#BiocManager::install("GO.db")
sessionInfo() #provides list of loaded packages and version of R.
## R version 4.3.2 (2023-10-31)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.0
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] stats4 stats graphics grDevices datasets utils methods
## [8] base
##
## other attached packages:
## [1] lubridate_1.9.4 forcats_1.0.0 stringr_1.5.2
## [4] dplyr_1.1.4 purrr_1.1.0 readr_2.1.5
## [7] tidyr_1.3.1 tibble_3.3.0 ggplot2_4.0.0
## [10] tidyverse_2.0.0 topGO_2.54.0 SparseM_1.84-2
## [13] GO.db_3.18.0 AnnotationDbi_1.64.1 IRanges_2.36.0
## [16] S4Vectors_0.40.2 Biobase_2.62.0 graph_1.80.0
## [19] BiocGenerics_0.48.1 ViSEAGO_1.16.0
##
## loaded via a namespace (and not attached):
## [1] RColorBrewer_1.1-3 rstudioapi_0.17.1 jsonlite_2.0.0
## [4] magrittr_2.0.3 farver_2.1.2 rmarkdown_2.29
## [7] fs_1.6.6 zlibbioc_1.48.2 vctrs_0.6.5
## [10] memoise_2.0.1 RCurl_1.98-1.17 webshot_0.5.5
## [13] htmltools_0.5.8.1 progress_1.2.3 dynamicTreeCut_1.63-1
## [16] curl_7.0.0 sass_0.4.10 bslib_0.9.0
## [19] htmlwidgets_1.6.4 plyr_1.8.9 plotly_4.11.0
## [22] cachem_1.1.0 igraph_2.1.4 lifecycle_1.0.4
## [25] iterators_1.0.14 pkgconfig_2.0.3 Matrix_1.6-5
## [28] R6_2.6.1 fastmap_1.2.0 GenomeInfoDbData_1.2.11
## [31] digest_0.6.37 colorspace_2.1-1 RSQLite_2.4.3
## [34] seriation_1.5.8 filelock_1.0.3 timechange_0.3.0
## [37] httr_1.4.7 compiler_4.3.2 bit64_4.6.0-1
## [40] withr_3.0.2 S7_0.2.0 BiocParallel_1.36.0
## [43] viridis_0.6.5 DBI_1.2.3 UpSetR_1.4.0
## [46] heatmaply_1.6.0 dendextend_1.19.1 R.utils_2.13.0
## [49] biomaRt_2.58.2 rappdirs_0.3.3 tools_4.3.2
## [52] R.oo_1.27.1 glue_1.8.0 DiagrammeR_1.0.11
## [55] GOSemSim_2.28.1 grid_4.3.2 fgsea_1.28.0
## [58] generics_0.1.4 gtable_0.3.6 tzdb_0.5.0
## [61] R.methodsS3_1.8.2 ca_0.71.1 data.table_1.17.8
## [64] hms_1.1.3 xml2_1.4.0 XVector_0.42.0
## [67] foreach_1.5.2 pillar_1.11.0 yulab.utils_0.2.1
## [70] BiocFileCache_2.10.2 lattice_0.22-7 renv_1.1.5
## [73] bit_4.6.0 tidyselect_1.2.1 registry_0.5-1
## [76] Biostrings_2.70.3 knitr_1.50 gridExtra_2.3
## [79] xfun_0.53 matrixStats_1.5.0 DT_0.34.0
## [82] visNetwork_2.1.4 stringi_1.8.7 lazyeval_0.2.2
## [85] yaml_2.3.10 evaluate_1.0.5 codetools_0.2-20
## [88] BiocManager_1.30.26 cli_3.6.5 jquerylib_0.1.4
## [91] Rcpp_1.1.0 GenomeInfoDb_1.38.8 dbplyr_2.5.1
## [94] png_0.1-8 XML_3.99-0.19 parallel_4.3.2
## [97] assertthat_0.2.1 blob_1.2.4 prettyunits_1.2.0
## [100] AnnotationForge_1.44.0 bitops_1.0-9 viridisLite_0.4.2
## [103] scales_1.4.0 crayon_1.5.3 rlang_1.1.6
## [106] cowplot_1.2.0 fastmatch_1.1-6 KEGGREST_1.42.0
## [109] TSP_1.2-5
I am going to perform functional enrichment of GO terms using ViSEAGO.
I am following this vignette: http://bioconductor.unipi.it/packages/devel/bioc/vignettes/ViSEAGO/inst/doc/ViSEAGO.html.
In the next chunk I am loading in my DESeq data. These results are ordered by adjusted p-value. As a reminder, negative LFC = higher in Oral tissue, and positive LFC = higher in Aboral tissue.
#load in DESeq results
DESeq <- read.csv("../output_RNA/differential_expression/DESeq_results.csv", header = TRUE) %>% dplyr::rename("query" ="X")
#make dataframes of just differentially expressed genes for each LFC direction - filtering a little more stringent, abs(LFC) >2
DE_05_Aboral <- DESeq %>% filter(padj < 0.05 & log2FoldChange > 1)
DE_05_OralEpi <- DESeq %>% filter(padj < 0.05& log2FoldChange < -1)
#load in annotation data
annot_tab <- read.delim("../references/annotation/protein-GO.tsv") %>% dplyr::rename(GOs = GeneOntologyIDs)
#filter annotation data for just expressed genes with GO annotations
annot_tab <- annot_tab %>% filter(query %in% DESeq$query)
annot_tab$GOs <- gsub("; ", ";", annot_tab$GOs)
annot_tab$GOs[annot_tab$GOs==""] <- NA
annot_tab <- annot_tab %>% filter(!is.na(GOs))
nrow(annot_tab)
## [1] 10638
nrow(annot_tab)/nrow(DESeq)
## [1] 0.7354812
10638/14464 genes in our dataset have GO information in this file. That is 74%.
sum(annot_tab$query %in% DE_05_Aboral$query)
## [1] 379
sum(annot_tab$query %in% DE_05_Aboral$query)/nrow(DE_05_Aboral)
## [1] 0.6792115
sum(annot_tab$query %in% DE_05_OralEpi$query)
## [1] 796
sum(annot_tab$query %in% DE_05_OralEpi$query)/nrow(DE_05_OralEpi)
## [1] 0.6546053
379/558 genes that are significantly upregulated in the Aboral tissue have annotation information. That is 68% of the genes.
796/1216 genes that are significantly upregulated in the Oral Epidermis tissue have annotation information. That is 71% of the genes.
##Get a list of GO Terms for all genes
annots <- annot_tab %>% dplyr::select(query,GOs) %>% dplyr::rename("GO.terms" = GOs)
# format into the format required by ViSEAGO for custom mappings
Custom_GOs <- annots %>%
# Separate GO terms into individual rows
separate_rows(GO.terms, sep = ";") %>%
# Add necessary columns
mutate(
taxid = "pacuta",
gene_symbol = query,
evidence = "SwissProt"
) %>%
# Rename columns
dplyr::rename(
gene_id = query,
GOID = GO.terms
) %>%
dplyr::select(taxid, gene_id, gene_symbol, GOID, evidence)
Custom_GOs_valid <- Custom_GOs %>% filter(GOID %in% keys(GO.db))
write.table(Custom_GOs_valid, "../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt",row.names = FALSE, sep = "\t", quote = FALSE,col.names=TRUE)
length(unique(Custom_GOs$gene_id))
## [1] 10638
length(unique(Custom_GOs_valid$gene_id))
## [1] 10637
We seem to have lost one gene when filtering for valid GO terms, so I need to account for that below.
Custom_Pacuta <- ViSEAGO::Custom2GO("../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt")
## 'select()' returned 1:1 mapping between keys and columns
myGENE2GO_Pacuta <- ViSEAGO::annotate(
id="pacuta",
Custom_Pacuta
)
selection <- DESeq %>% filter(query %in% Custom_GOs_valid$gene_id) %>%
mutate(DE_05_Aboral = ifelse(query %in% DE_05_Aboral$query, 1,0)) %>%
mutate(DE_05_Oral = ifelse(query %in% DE_05_OralEpi$query, 1,0)) %>%
mutate(expressed = 1)
selection_Aboral <- selection %>% pull(DE_05_Aboral) %>% as.factor()
names(selection_Aboral) <- selection %>% pull(query)
selection_Oral <- selection %>% pull(DE_05_Oral) %>% as.factor()
names(selection_Oral) <- selection %>% pull(query)
expressed <- selection %>% pull(expressed) %>% as.factor()
names(expressed) <- selection %>% pull(query)
# create viseago object
selection <- names(selection_Oral)[selection_Oral==1]
background <- names(expressed)
BP_Oral <- ViSEAGO::create_topGOdata(
geneSel=selection,
allGenes=background,
gene2GO=myGENE2GO_Pacuta,
ont="BP",
nodeSize=5
)
##
## Building most specific GOs .....
## ( 8920 GO terms found. )
##
## Build GO DAG topology ..........
## ( 12838 GO terms and 28523 relations. )
##
## Annotating nodes ...............
## ( 9534 genes annotated to the GO terms. )
# perform TopGO test using classic algorithm
classic_Oral <- topGO::runTest(
BP_Oral,
algorithm ="classic",
statistic = "fisher"
)
##
## -- Classic Algorithm --
##
## the algorithm is scoring 4542 nontrivial nodes
## parameters:
## test statistic: fisher
# perform TopGO test using elim algorithm
elim_Oral <- topGO::runTest(
BP_Oral,
algorithm ="elim",
statistic = "fisher"
)
##
## -- Elim Algorithm --
##
## the algorithm is scoring 4542 nontrivial nodes
## parameters:
## test statistic: fisher
## cutOff: 0.01
##
## Level 18: 2 nodes to be scored (0 eliminated genes)
##
## Level 17: 3 nodes to be scored (0 eliminated genes)
##
## Level 16: 13 nodes to be scored (0 eliminated genes)
##
## Level 15: 37 nodes to be scored (0 eliminated genes)
##
## Level 14: 50 nodes to be scored (12 eliminated genes)
##
## Level 13: 79 nodes to be scored (67 eliminated genes)
##
## Level 12: 166 nodes to be scored (95 eliminated genes)
##
## Level 11: 324 nodes to be scored (116 eliminated genes)
##
## Level 10: 483 nodes to be scored (257 eliminated genes)
##
## Level 9: 610 nodes to be scored (501 eliminated genes)
##
## Level 8: 673 nodes to be scored (577 eliminated genes)
##
## Level 7: 714 nodes to be scored (656 eliminated genes)
##
## Level 6: 636 nodes to be scored (1082 eliminated genes)
##
## Level 5: 408 nodes to be scored (1165 eliminated genes)
##
## Level 4: 234 nodes to be scored (1194 eliminated genes)
##
## Level 3: 92 nodes to be scored (1196 eliminated genes)
##
## Level 2: 17 nodes to be scored (1196 eliminated genes)
##
## Level 1: 1 nodes to be scored (1196 eliminated genes)
BP_Results <- ViSEAGO::merge_enrich_terms(
Input = list(#Oral_classic = c("BP_Oral", "classic_Oral"),
Oral_elim = c("BP_Oral", "elim_Oral"))
)
## 'select()' returned 1:1 mapping between keys and columns
# display the merged table
BP_Results
## - object class: enrich_GO_terms
## - ontology: BP
## - method: topGO
## - summary:
## Oral_elim
## BP_Oral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## available_genes: 10637
## available_genes_significant: 796
## feasible_genes: 9534
## feasible_genes_significant: 688
## genes_nodeSize: 5
## nodes_number: 6852
## edges_number: 14732
## elim_Oral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## test_name: fisher p<0.01
## algorithm_name: elim
## GO_scored: 6852
## GO_significant: 58
## feasible_genes: 9534
## feasible_genes_significant: 688
## genes_nodeSize: 5
## Nontrivial_nodes: 4542
## - enrichment pvalue cutoff:
## Oral_elim : 0.01
## - enrich GOs (in at least one list): 58 GO terms of 1 conditions.
## Oral_elim : 58 terms
ViSEAGO::show_table(BP_Results)
# print the merged table in a file
ViSEAGO::show_table(
BP_Results,
"../output_RNA/differential_expression/semantic-enrichment/DE_05_Oral.csv"
)
# initialize
myGOs<-ViSEAGO::build_GO_SS(
gene2GO=myGENE2GO_Pacuta,
enrich_GO_terms=BP_Results
)
## 'select()' returned 1:1 mapping between keys and columns
myGOs <- ViSEAGO::compute_SS_distances(
myGOs,
distance=c("Wang")
)
myGOs
## - object class: GO_SS
## - ontology: BP
## - method: topGO
## - summary:
## Oral_elim
## BP_Oral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## available_genes: 10637
## available_genes_significant: 796
## feasible_genes: 9534
## feasible_genes_significant: 688
## genes_nodeSize: 5
## nodes_number: 6852
## edges_number: 14732
## elim_Oral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## test_name: fisher p<0.01
## algorithm_name: elim
## GO_scored: 6852
## GO_significant: 58
## feasible_genes: 9534
## feasible_genes_significant: 688
## genes_nodeSize: 5
## Nontrivial_nodes: 4542
## - enrichment pvalue cutoff:
## Oral_elim : 0.01
## - enrich GOs (in at least one list): 58 GO terms of 1 conditions.
## Oral_elim : 58 terms
## - terms distances: Wang
# display MDSplot
ViSEAGO::MDSplot(myGOs,
"GOterms")
# GOterms heatmap with the default parameters
Wang_clusters_wardD2 <- ViSEAGO::GOterms_heatmap(
myGOs,
showIC = TRUE,
showGOlabels = TRUE,
GO.tree = list(
tree = list(distance = "Wang", aggreg.method = "ward.D2"),
cut = list(
dynamic = list(
pamStage = TRUE,
pamRespectsDendro = TRUE,
deepSplit = 2,
minClusterSize = 2
)
)
),
samples.tree = NULL
)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the dendextend package.
## Please report the issue at <https://github.com/talgalili/dendextend/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Display the clusters-heatmap
ViSEAGO::show_heatmap(
Wang_clusters_wardD2,
"GOterms")
# Display the clusters-heatmap table
ViSEAGO::show_table(Wang_clusters_wardD2)
# Print the clusters-heatmap table
ViSEAGO::show_table(
Wang_clusters_wardD2,
"../output_RNA/differential_expression/semantic-enrichment/DE_05_Oral_cluster_heatmap_Wang_wardD2.csv"
)
# display colored MDSplot
ViSEAGO::MDSplot(
Wang_clusters_wardD2,
"GOterms")
# calculate semantic similarities between clusters of GO terms
Wang_clusters_wardD2<-ViSEAGO::compute_SS_distances(
Wang_clusters_wardD2,
distance=c("max", "avg","rcmax", "BMA")
)
## 'select()' returned 1:1 mapping between keys and columns
# MDSplot - one point per cluster
ViSEAGO::MDSplot(
Wang_clusters_wardD2,
"GOclusters")
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
# GOclusters heatmap
Wang_clusters_wardD2<-ViSEAGO::GOclusters_heatmap(
Wang_clusters_wardD2,
tree=list(
distance="BMA",
aggreg.method="ward.D2"
)
)
# display the GOClusters heatmap
ViSEAGO::show_heatmap(
Wang_clusters_wardD2,
"GOclusters")
# create viseago object
selection <- names(selection_Aboral)[selection_Aboral==1]
background <- names(expressed)
BP_Aboral <- ViSEAGO::create_topGOdata(
geneSel=selection,
allGenes=background,
gene2GO=myGENE2GO_Pacuta,
ont="BP",
nodeSize=5
)
##
## Building most specific GOs .....
## ( 8920 GO terms found. )
##
## Build GO DAG topology ..........
## ( 12838 GO terms and 28523 relations. )
##
## Annotating nodes ...............
## ( 9534 genes annotated to the GO terms. )
# perform TopGO test using classic algorithm
classic_Aboral <- topGO::runTest(
BP_Aboral,
algorithm ="classic",
statistic = "fisher"
)
##
## -- Classic Algorithm --
##
## the algorithm is scoring 3182 nontrivial nodes
## parameters:
## test statistic: fisher
# perform TopGO test using elim algorithm
elim_Aboral <- topGO::runTest(
BP_Aboral,
algorithm ="elim",
statistic = "fisher"
)
##
## -- Elim Algorithm --
##
## the algorithm is scoring 3182 nontrivial nodes
## parameters:
## test statistic: fisher
## cutOff: 0.01
##
## Level 16: 5 nodes to be scored (0 eliminated genes)
##
## Level 15: 19 nodes to be scored (0 eliminated genes)
##
## Level 14: 24 nodes to be scored (0 eliminated genes)
##
## Level 13: 45 nodes to be scored (175 eliminated genes)
##
## Level 12: 88 nodes to be scored (236 eliminated genes)
##
## Level 11: 181 nodes to be scored (238 eliminated genes)
##
## Level 10: 285 nodes to be scored (291 eliminated genes)
##
## Level 9: 403 nodes to be scored (334 eliminated genes)
##
## Level 8: 477 nodes to be scored (495 eliminated genes)
##
## Level 7: 517 nodes to be scored (752 eliminated genes)
##
## Level 6: 501 nodes to be scored (1055 eliminated genes)
##
## Level 5: 340 nodes to be scored (1864 eliminated genes)
##
## Level 4: 193 nodes to be scored (2380 eliminated genes)
##
## Level 3: 86 nodes to be scored (2456 eliminated genes)
##
## Level 2: 17 nodes to be scored (2617 eliminated genes)
##
## Level 1: 1 nodes to be scored (2617 eliminated genes)
BP_Results <- ViSEAGO::merge_enrich_terms(
Input = list(#Aboral_classic = c("BP_Aboral", "classic_Aboral"),
Aboral_elim = c("BP_Aboral", "elim_Aboral"))
)
## 'select()' returned 1:1 mapping between keys and columns
# display the merged table
BP_Results
## - object class: enrich_GO_terms
## - ontology: BP
## - method: topGO
## - summary:
## Aboral_elim
## BP_Aboral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## available_genes: 10637
## available_genes_significant: 379
## feasible_genes: 9534
## feasible_genes_significant: 335
## genes_nodeSize: 5
## nodes_number: 6852
## edges_number: 14732
## elim_Aboral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## test_name: fisher p<0.01
## algorithm_name: elim
## GO_scored: 6852
## GO_significant: 118
## feasible_genes: 9534
## feasible_genes_significant: 335
## genes_nodeSize: 5
## Nontrivial_nodes: 3182
## - enrichment pvalue cutoff:
## Aboral_elim : 0.01
## - enrich GOs (in at least one list): 118 GO terms of 1 conditions.
## Aboral_elim : 118 terms
ViSEAGO::show_table(BP_Results)
# print the merged table in a file
ViSEAGO::show_table(
BP_Results,
"../output_RNA/differential_expression/semantic-enrichment/DE_05_Aboral.csv"
)
# initialize
myGOs<-ViSEAGO::build_GO_SS(
gene2GO=myGENE2GO_Pacuta,
enrich_GO_terms=BP_Results
)
## 'select()' returned 1:1 mapping between keys and columns
myGOs <- ViSEAGO::compute_SS_distances(
myGOs,
distance=c("Wang")
)
myGOs
## - object class: GO_SS
## - ontology: BP
## - method: topGO
## - summary:
## Aboral_elim
## BP_Aboral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## available_genes: 10637
## available_genes_significant: 379
## feasible_genes: 9534
## feasible_genes_significant: 335
## genes_nodeSize: 5
## nodes_number: 6852
## edges_number: 14732
## elim_Aboral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## test_name: fisher p<0.01
## algorithm_name: elim
## GO_scored: 6852
## GO_significant: 118
## feasible_genes: 9534
## feasible_genes_significant: 335
## genes_nodeSize: 5
## Nontrivial_nodes: 3182
## - enrichment pvalue cutoff:
## Aboral_elim : 0.01
## - enrich GOs (in at least one list): 118 GO terms of 1 conditions.
## Aboral_elim : 118 terms
## - terms distances: Wang
# display MDSplot
ViSEAGO::MDSplot(myGOs,
"GOterms")
# GOterms heatmap with the default parameters
Wang_clusters_wardD2 <- ViSEAGO::GOterms_heatmap(
myGOs,
showIC = TRUE,
showGOlabels = TRUE,
GO.tree = list(
tree = list(distance = "Wang", aggreg.method = "ward.D2"),
cut = list(
dynamic = list(
pamStage = TRUE,
pamRespectsDendro = TRUE,
deepSplit = 2,
minClusterSize = 2
)
)
),
samples.tree = NULL
)
# Display the clusters-heatmap
ViSEAGO::show_heatmap(
Wang_clusters_wardD2,
"GOterms")
# Display the clusters-heatmap table
ViSEAGO::show_table(Wang_clusters_wardD2)
# Print the clusters-heatmap table
ViSEAGO::show_table(
Wang_clusters_wardD2,
"../output_RNA/differential_expression/semantic-enrichment/DE_05_Aboral_cluster_heatmap_Wang_wardD2.csv"
)
# display colored MDSplot
ViSEAGO::MDSplot(
Wang_clusters_wardD2,
"GOterms")
# calculate semantic similarities between clusters of GO terms
Wang_clusters_wardD2<-ViSEAGO::compute_SS_distances(
Wang_clusters_wardD2,
distance=c("max", "avg","rcmax", "BMA")
)
## 'select()' returned many:1 mapping between keys and columns
# MDSplot - one point per cluster
ViSEAGO::MDSplot(
Wang_clusters_wardD2,
"GOclusters")
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
# GOclusters heatmap
Wang_clusters_wardD2<-ViSEAGO::GOclusters_heatmap(
Wang_clusters_wardD2,
tree=list(
distance="BMA",
aggreg.method="ward.D2"
)
)
# display the GOClusters heatmap
ViSEAGO::show_heatmap(
Wang_clusters_wardD2,
"GOclusters")
BP_Results <- ViSEAGO::merge_enrich_terms(
Input = list(Oral_elim = c("BP_Oral", "elim_Oral"),
Aboral_elim = c("BP_Aboral", "elim_Aboral"))
)
## 'select()' returned 1:1 mapping between keys and columns
# display the merged table
BP_Results
## - object class: enrich_GO_terms
## - ontology: BP
## - method: topGO
## - summary:
## Oral_elim
## BP_Oral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## available_genes: 10637
## available_genes_significant: 796
## feasible_genes: 9534
## feasible_genes_significant: 688
## genes_nodeSize: 5
## nodes_number: 6852
## edges_number: 14732
## elim_Oral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## test_name: fisher p<0.01
## algorithm_name: elim
## GO_scored: 6852
## GO_significant: 58
## feasible_genes: 9534
## feasible_genes_significant: 688
## genes_nodeSize: 5
## Nontrivial_nodes: 4542
## Aboral_elim
## BP_Aboral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## available_genes: 10637
## available_genes_significant: 379
## feasible_genes: 9534
## feasible_genes_significant: 335
## genes_nodeSize: 5
## nodes_number: 6852
## edges_number: 14732
## elim_Aboral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## test_name: fisher p<0.01
## algorithm_name: elim
## GO_scored: 6852
## GO_significant: 118
## feasible_genes: 9534
## feasible_genes_significant: 335
## genes_nodeSize: 5
## Nontrivial_nodes: 3182
## - enrichment pvalue cutoff:
## Oral_elim : 0.01
## Aboral_elim : 0.01
## - enrich GOs (in at least one list): 176 GO terms of 2 conditions.
## Oral_elim : 58 terms
## Aboral_elim : 118 terms
ViSEAGO::show_table(BP_Results)
# print the merged table in a file
ViSEAGO::show_table(
BP_Results,
"../output_RNA/differential_expression/semantic-enrichment/DE_05.csv"
)
# initialize
myGOs<-ViSEAGO::build_GO_SS(
gene2GO=myGENE2GO_Pacuta,
enrich_GO_terms=BP_Results
)
## 'select()' returned 1:1 mapping between keys and columns
myGOs <- ViSEAGO::compute_SS_distances(
myGOs,
distance=c("Wang")
)
myGOs
## - object class: GO_SS
## - ontology: BP
## - method: topGO
## - summary:
## Oral_elim
## BP_Oral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## available_genes: 10637
## available_genes_significant: 796
## feasible_genes: 9534
## feasible_genes_significant: 688
## genes_nodeSize: 5
## nodes_number: 6852
## edges_number: 14732
## elim_Oral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## test_name: fisher p<0.01
## algorithm_name: elim
## GO_scored: 6852
## GO_significant: 58
## feasible_genes: 9534
## feasible_genes_significant: 688
## genes_nodeSize: 5
## Nontrivial_nodes: 4542
## Aboral_elim
## BP_Aboral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## available_genes: 10637
## available_genes_significant: 379
## feasible_genes: 9534
## feasible_genes_significant: 335
## genes_nodeSize: 5
## nodes_number: 6852
## edges_number: 14732
## elim_Aboral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## test_name: fisher p<0.01
## algorithm_name: elim
## GO_scored: 6852
## GO_significant: 118
## feasible_genes: 9534
## feasible_genes_significant: 335
## genes_nodeSize: 5
## Nontrivial_nodes: 3182
## - enrichment pvalue cutoff:
## Oral_elim : 0.01
## Aboral_elim : 0.01
## - enrich GOs (in at least one list): 176 GO terms of 2 conditions.
## Oral_elim : 58 terms
## Aboral_elim : 118 terms
## - terms distances: Wang
# display MDSplot
ViSEAGO::MDSplot(myGOs,
"GOterms")
# GOterms heatmap with the default parameters
Wang_clusters_wardD2 <- ViSEAGO::GOterms_heatmap(
myGOs,
showIC = TRUE,
showGOlabels = TRUE,
GO.tree = list(
tree = list(distance = "Wang", aggreg.method = "ward.D2"),
cut = list(
dynamic = list(
pamStage = TRUE,
pamRespectsDendro = TRUE,
deepSplit = 2,
minClusterSize = 4
)
)
),
samples.tree = NULL
)
# Display the clusters-heatmap
ViSEAGO::show_heatmap(
Wang_clusters_wardD2,
"GOterms")
# Display the clusters-heatmap table
ViSEAGO::show_table(Wang_clusters_wardD2)
# Print the clusters-heatmap table
ViSEAGO::show_table(
Wang_clusters_wardD2,
"../output_RNA/differential_expression/semantic-enrichment/DE_05_cluster_heatmap_Wang_wardD2.csv"
)
# display colored MDSplot
ViSEAGO::MDSplot(
Wang_clusters_wardD2,
"GOterms")
# calculate semantic similarities between clusters of GO terms
Wang_clusters_wardD2<-ViSEAGO::compute_SS_distances(
Wang_clusters_wardD2,
distance=c("max", "avg","rcmax", "BMA")
)
## 'select()' returned many:1 mapping between keys and columns
# MDSplot - one point per cluster
ViSEAGO::MDSplot(
Wang_clusters_wardD2,
"GOclusters")
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
# GOclusters heatmap
Wang_clusters_wardD2<-ViSEAGO::GOclusters_heatmap(
Wang_clusters_wardD2,
tree=list(
distance="BMA",
aggreg.method="ward.D2"
)
)
# display the GOClusters heatmap
ViSEAGO::show_heatmap(
Wang_clusters_wardD2,
"GOclusters")
clustered_allDEGs_enrichedGO <- as.data.frame(Wang_clusters_wardD2@enrich_GOs@data)
cluster_map <- data.frame(cluster_full = rownames(as.matrix(slot(Wang_clusters_wardD2,"clusters_dist")[["BMA"]])))
cluster_map <- cluster_map %>%
mutate(
cluster = str_extract(cluster_full, "\\d+"),
Cluster.name = str_replace(cluster_full,".*?_.*?_","")
) %>%
mutate(Cluster.name = paste0("Cluster ", cluster, ": ", Cluster.name)) %>%
select(cluster, Cluster.name)
cluster_map
## cluster Cluster.name
## 1 1 Cluster 1: neuron development
## 2 2 Cluster 2: developmental process
## 3 3 Cluster 3: anatomical structure development
## 4 4 Cluster 4: anatomical structure development
## 5 5 Cluster 5: multicellular organism development
## 6 6 Cluster 6: sensory organ development
## 7 7 Cluster 7: system development
## 8 8 Cluster 8: animal organ development
## 9 9 Cluster 9: transport
## 10 10 Cluster 10: transport
## 11 11 Cluster 11: multicellular organismal process
## 12 12 Cluster 12: nervous system process
## 13 13 Cluster 13: metabolic process
## 14 14 Cluster 14: response to stimulus
## 15 15 Cluster 15: immune effector process
## 16 16 Cluster 16: biological_process
## 17 17 Cluster 17: response to external stimulus
## 18 18 Cluster 18: secretion by cell
## 19 19 Cluster 19: cell-cell signaling
## 20 20 Cluster 20: signaling
## 21 21 Cluster 21: cell adhesion
## 22 22 Cluster 22: regulation of cell adhesion
## 23 23 Cluster 23: cell junction assembly
## 24 24 Cluster 24: external encapsulating structure organization
## 25 25 Cluster 25: biological_process
cluster_map_fixed <- cluster_map %>%
mutate(Cluster.name = str_replace(Cluster.name, "Cluster 2: developmental process", "Cluster 2: cell fate determination"),
Cluster.name = str_replace(Cluster.name, "Cluster 4: anatomical structure development", "Cluster 4: regeneration & tissue morphogenesis"),
Cluster.name = str_replace(Cluster.name, "Cluster 5: multicellular organism development", "Cluster 5: multicellular organism pattern formation"),
Cluster.name = str_replace(Cluster.name, "Cluster 9: transport", "Cluster 9: metal ion transmembrane transport"),
Cluster.name = str_replace(Cluster.name, "Cluster 10: transport", "Cluster 10: amino acid & small molecule transmembrane transport"),
Cluster.name = str_replace(Cluster.name, "Cluster 12: nervous system process", "Cluster 12: sensory perception"),
Cluster.name = str_replace(Cluster.name, "Cluster 16: biological_process", "Cluster 16: regeneration & wound healing"),
Cluster.name = str_replace(Cluster.name, "Cluster 18: secretion by cell", "Cluster 18: neurotransmitter and hormone secretion"),
Cluster.name = str_replace(Cluster.name, "Cluster 24: external encapsulating structure organization", "Cluster 24: extracellular matrix organization"),
Cluster.name = str_replace(Cluster.name, "Cluster 25: biological_process", "Cluster 25: cell organization and homeostasis")
)
clustered_allDEGs_enrichedGO <- clustered_allDEGs_enrichedGO %>%
left_join(cluster_map_fixed, by = c("GO.cluster" = "cluster"))
write.csv(clustered_allDEGs_enrichedGO,"../output_RNA/differential_expression/semantic-enrichment/DE_05_clusters_named.csv")
library(DT)
# Interactive table with scrolling
datatable(clustered_allDEGs_enrichedGO,
options = list(
pageLength = 10, # rows per page
scrollX = TRUE, # horizontal scroll if wide
scrollY = "400px" # vertical scroll
))
library(dplyr)
library(tidyr)
library(ggplot2)
# Create bidirectional data
cluster_data <- clustered_allDEGs_enrichedGO %>%
select(Cluster.name, Oral_elim.Significant_genes, Aboral_elim.Significant_genes) %>%
mutate(
# Clean cluster names
Cluster.name = gsub("^Cluster \\d+:\\s*", "", Cluster.name),
Oral_terms = ifelse(!is.na(Oral_elim.Significant_genes), 1, 0),
Aboral_terms = ifelse(!is.na(Aboral_elim.Significant_genes), 1, 0)
) %>%
group_by(Cluster.name) %>%
summarise(
Oral_terms = sum(Oral_terms),
Aboral_terms = sum(Aboral_terms),
.groups = "drop"
) %>%
filter(Oral_terms + Aboral_terms > 0) %>%
mutate(
deviation = Oral_terms - Aboral_terms,
Aboral_terms = -Aboral_terms # Make negative for bidirectional plot
) %>%
pivot_longer(cols = c("Oral_terms", "Aboral_terms"),
names_to = "Tissue", values_to = "n_terms") %>%
mutate(Tissue = ifelse(Tissue == "Oral_terms", "Oral epidermis", "Aboral"))
# Calculate transition points for dashed lines
transition_data <- cluster_data %>%
distinct(Cluster.name, deviation) %>%
arrange(deviation)
aboral_to_equal <- sum(transition_data$deviation < 0) + 0.5
equal_to_oral <- sum(transition_data$deviation <= 0) + 0.5
# Plot
p <- ggplot(cluster_data, aes(x = reorder(Cluster.name, deviation), y = n_terms, fill = Tissue)) +
geom_col(width = 0.7, alpha = 0.8) +
geom_hline(yintercept = 0, color = "black", linewidth = 0.8) +
geom_vline(xintercept = c(aboral_to_equal, equal_to_oral),
color = "grey60", linetype = "dashed", linewidth = 0.6) +
scale_fill_manual(values = c("Oral epidermis" = "#2E8B57", "Aboral" = "#9370DB")) +
coord_flip() +
theme_minimal() +
theme(
panel.grid.major.y = element_blank(),
panel.grid.minor = element_blank(),
legend.position = "bottom",
legend.title = element_text(face = "bold"),
axis.title = element_text(face = "bold"),
axis.text = element_text(color = "black"),
axis.text.x = element_text(size = 11)
) +
scale_y_continuous(labels = abs) +
labs(
x = "Gene clusters",
y = "Number of enriched GO terms",
title = "Tissue-specific GO term enrichment patterns",
subtitle = "Clusters ordered by net enrichment (Oral vs. Aboral)"
)
ggsave("../output_RNA/differential_expression/semantic-enrichment/GO_enrichment_bidirectional.png", p, width = 10, height = 8, dpi = 300)
print(p)